Interactive plot of congressional word usage by party built with plotly


library(tidyverse)
library(plotly)

# data from Sunlight Foundation Capitol Words API

wordfreq %>% split(.$phrase) %>% 
    map(plot_ly, type = 'bar', x = ~year, y = ~count, color = ~party, 
        colors = c('#4575b4', '#d73027')) %>% 
    subplot(nrows = 3, shareX = TRUE, titleX = FALSE, titleY = TRUE, 
            heights = c(.166, .4, .434)) %>% 
    layout(title = 'Congressional Word Usage by Party',
           xaxis = list(domain = c(0.1, 1)), 
           yaxis = list(title = 'rich', dtick = 500, rangemode = 'tozero'),
           yaxis2 = list(title = 'middle class'), 
           yaxis3 = list(title = 'poor'),
           showlegend = FALSE, 
           updatemenus = list(
               list(yanchor = 'top', y = 1,
                    buttons = list(
                        list(args = list("type", "bar"),
                             label = "Bar"),
                        list(args = list("type", "line"),
                             label = "Line"))),
               list(yanchor = 'top', y = 0.8,
                    buttons = list(
                        list(args = list('line', list(shape = 'linear')),
                             label = 'Linear'),
                        list(args = list('line', list(shape = 'hvh')),
                             label = 'Step'),
                        list(args = list('line', list(shape = 'spline')),
                             label = 'Spline')))))

Animation of DC home values built with ggplot2 and gganimate


library(tidyverse)
library(gganimate)

# data from Zillow
house <- read.csv('Zip_Zhvi_AllHomes.csv')
dczip <- house %>% filter(State == 'DC') %>% 
    gather(month, zhvi, starts_with('X')) %>% 
    filter(!is.na(zhvi))

plots <- ggplot(dczip, 
                aes(x = RegionName, y = zhvi, colour = zhvi, frame = month), 
                show.legend = F) + 
    geom_point() + 
    geom_segment(aes(xend = ..x.., yend = 0)) + 
    scale_colour_gradient(low = '#2171b5', high = '#e34a33') + 
    labs(x = 'Zip Code', y = 'Zillow Home Value Index', title = 'Washington DC Home Values') + 
    scale_y_continuous(labels = scales::comma) + 
    theme(text = element_text(family = "Myriad Pro", colour = "gray30"),
          axis.title = element_text(size = 9, face = 'bold'),
          plot.title = element_text(face = 'bold'), 
          axis.ticks = element_line(colour = 'gray70'),
          legend.position = "none")

gg_animate(plots, saver = 'gif',
           interval = 0.02, ani.type = 'svg', 
           ani.width = 600, ani.height = 400)

Leaflet map of candidates running for president from FEC data


library(dplyr)
library(leaflet)

# candidate data from http://fec.gov/data/CandidateSummary.do?format=html
candidates <- read.csv('candidates.csv')

# build HTML for leaflet marker popups
candidates_for_map <- candidates %>% filter(!is.na(lat)) %>% 
    mutate(popup = paste(
        '<table><tr><th>Candidate:</th><th>', can_nam, '</th></tr>',
        '<tr><td>Address:</td><td>', 
        can_str1, can_str2, '<br />',
        can_cit, can_sta, can_zip, '</td></tr>',
        '<tr><td>Net Contributions:</td><td>', net_con,
        '</td></tr></table>'
    )) %>% 
    select(lat, lng = lon, popup)

leaflet(candidates_for_map) %>% 
    setView(-95.71289, 37.09024, zoom = 4) %>%
    addProviderTiles('CartoDB.Positron') %>% 
    addMarkers(
        lat = ~lat, 
        lng = ~lng,
        popup = ~popup, 
        clusterOptions = markerClusterOptions()
    )

Trump presidential campaign disbursement sunburst

Legend

library(tidyverse)
library(sunburstR)

# disbursement data from http://fec.gov/data/CandidateSummary.do?format=html
donald <- read_csv('Two_Year_Summary_Operating_Expenditures.csv',
                   skip = 7, trim_ws = TRUE,
                   col_types = cols(`Payment Date` = col_date(format = '%m/%d/%Y'),
                                    Amount = col_number()))

donald %>% mutate(Category = gsub('^(\\w+).*', '\\1', Purpose)) %>% 
    mutate_at(c('Purpose', 'Category', '`Payee Name`'), funs(gsub('-', ' ', .))) %>% 
    mutate(Categories = paste(Category, 
                              Purpose, 
                              `Payee Name`, 
                              `Report Year`, 
                              `Report Type`, sep = '-')) %>% 
    select(Categories, Amount) %>% 
    sunburst(count = TRUE)

About Me

Hi, I'm Edward! I enjoy munging, modeling, and visualizing data in R. In particular, I

Munge

  • Import and export data
    • Call web APIs to retrieve up-to-date data
    • Scrape and parse HTML directly from websites
    • Connect to database backends
    • Write SQL queries
  • Data cleaning
    • Reshape data into a tidy form
    • Parse data with regular exprssions (regex)

Model

  • Create and select features
  • Fit and tune machine learning models
    • Supervised learning
      • Regression (least squares, ridge, lasso, PCR, etc.)
      • Classification (logistic regression, LDA, QDA, KNN, etc.)
    • Text mining and natural language processing

Visualize

  • Make static visualizations:
    • Make base R and ggplot2 plots
    • Make maps and visualize geospatial data
  • Make dynamic visualizations:
    • Make responsive websites with Shiny
    • Use knitr, RMarkdown and FlexDashboard to make reproducible reports and websites with client-side interactivity (like this one!)
    • Make interactive maps and plots with leaflet, plotly, and the many exciting D3.js htmlwidgets under development
    • Make animated plots

Hire me!

Data I like

  • Politics
    • Policy
      • Macroeconomics
      • Foreign affairs
      • Trade
    • Campaigns
      • Field
  • Education

Contact me